function get_one()
{
    url=$1
    wget $url -O xx
    iconv -f GBK -t UTF-8 xx -o oo
    dos2unix oo
    cat oo | grep -A 10 -E '<div class="con-txt">|pre-wrap'|grep -v pre-wrap| cut -d '<' -f 1|grep -Ev "^[ ]*$"| sed -f sed_rules
}
function get_joke()
{
    url=$1
    site="http://www.jijidi.com/"
    wget $url -O xx
    iconv -f GBK -t UTF-8 xx -o oo
    dos2unix oo
    rm -rf tt
    cat oo|grep "lengxiaohua"|grep href|grep span|grep -v class|cut -d \" -f 2|grep -Ev "^$"| while read page
    do
        full_url=$site$page
        joke=$(get_one $full_url)
        echo $joke >> tt
    done
    cat tt | grep -Ev "^$" >> $joke_file 
}

function simple_get_joke()
{
    url=$1
    wget $url -O xx
    iconv -f GBK -t UTF-8 xx -o oo
    dos2unix oo
    rm -rf tt
    cat oo|grep -vE 'div|baidu|class'|grep -A1 "a href" |grep -v '\-\-'|cut -d '>' -f 2| cut -d '<' -f 1| grep -Ev "^[ ]*$"|sed -f sed_rules| while read line
    #cat oo|grep -A1 rank-txt |grep -Ev "align|rank-txt"|grep -v '\-\-'|cut -d '>' -f 2| cut -d '<' -f 1| grep -Ev "^[ ]*$"|sed -f sed_rules| while read line
    do
        echo $line >> tt
    done
    #cat tt | grep -Ev "^$"
    cat tt | grep -Ev "^$" >> $joke_file 

}

joke_file=/data/jokes/lengxh_new2.txt
#url="http://www.jijidi.com/lengxiaohua/list_5_1.html"
for i in `seq 200 300`
do
    url="http://www.jijidi.com/lengxiaohua/list_5_$i.html"
    #url="http://www.jijidi.com/tag/nahanxiaohua_10099_$i.html"
    #url=http://www.jijidi.com/meinvxiaohua/index_3_$i.html
    #get_joke $url
    simple_get_joke $url
done
sort -u $joke_file -o $joke_file
